import patchAnchorElements from './patchAnchorElements';
import patchBreakElements from './patchBreakElements';
import patchElementInlineStyles from './patchElementInlineStyles';
import patchListElements from './patchListElements';
import patchMathElements from './patchMathElements';
import patchParagraphElements from './patchParagraphElements';
import patchStyleElements from './patchStyleElements';
import patchTableElements from './patchTableElements';
import { DOM_ATTRIBUTE_SIZE, HAIR_SPACE_CHAR, SPACER_SIZE_TAB } from './SpacerMarkSpec';
import toSafeHTMLDocument from './toSafeHTMLDocument';

const HTML_BODY_PATTERN = /<body[\s>]/i;
const LONG_UNDERLINE_PATTERN = /_+/g;
// This is a workround to convert "&nbsp;&nbsp;......&nbsp;" into wider tab
// tab spacers. For every 6 "&nbsp;", they will be converted into tab spacers.
const LONG_TAB_SPACE_PATTERN = /(&nbsp;){6}/g;
const TAB_SPACER_HTML = new Array(6).join(`<span ${DOM_ATTRIBUTE_SIZE}="${SPACER_SIZE_TAB}">${HAIR_SPACE_CHAR}</span>`);

function replaceNOBR(matched: string): string {
  // This is a workround to convert "_______" into none-wrapped text
  // that apppears like a horizontal line.
  if (matched?.length >= 20) {
    // needs extra space after it so user can escape the <nobr />.
    matched = `<nobr>${String(matched)}</nobr> `;
  }

  return matched;
}

const classToTagMap: { [key: string]: string } = {
  p1: 'h1',
  p2: 'h2'
};

function patchAppleNotesHeadings(doc: Document): void {
  for (const className in classToTagMap) {
    const pElements = Array.from(doc.querySelectorAll(`p.${className}`));
    pElements.forEach((p) => {
      const newTag = classToTagMap[className];
      const replacement = doc.createElement(newTag);

      while (p.firstChild) {
        replacement.appendChild(p.firstChild);
      }

      p.parentNode?.replaceChild(replacement, p);
    });
  }
}

// This regex pattern matches <p class="p4"><br></p> that comes just before another tag (excluding another <p class="p4"><br></p>)
const redundantNewlineBeforeContentPattern = /<p\s+class="p4">\s*<br>\s*<\/p>\s*(?!<p\s+class="p4">\s*<br>\s*<\/p>)/gi;

function removeRedundantNewlines(html: string): string {
  return html.replace(redundantNewlineBeforeContentPattern, '');
}

function patchBoldAndItalicTags(doc: Document): void {
  ['b', 'i'].forEach((tag) => {
    const elements = Array.from(doc.querySelectorAll(tag));
    elements.forEach((el) => {
      const replacement = doc.createElement(tag === 'b' ? 'strong' : 'em');
      while (el.firstChild) {
        replacement.appendChild(el.firstChild);
      }
      el.parentNode?.replaceChild(replacement, el);
    });
  });
}

export default function normalizeHTML(html: string): string {
  try {
    html = removeRedundantNewlines(html);
  } catch (error) {
    console.error('Failed to remove redundant newlines', error);
  }

  let body: HTMLElement | null = null;
  const sourceIsPage = HTML_BODY_PATTERN.test(html);
  html = html.replace(LONG_UNDERLINE_PATTERN, replaceNOBR);
  // Convert every two consecutive "&nbsp;" into a spacer tab.
  html = html.replace(LONG_TAB_SPACE_PATTERN, TAB_SPACER_HTML);
  const doc = toSafeHTMLDocument(html);

  if (doc) {
    // Apple notes
    patchAppleNotesHeadings(doc);
    patchBoldAndItalicTags(doc);
    // styles
    patchStyleElements(doc);
    patchElementInlineStyles(doc);
    // contents
    patchAnchorElements(doc);
    patchBreakElements(doc);
    patchListElements(doc);
    patchParagraphElements(doc);
    patchTableElements(doc);
    try {
      patchMathElements(doc);
    } catch (error) {
      console.error('Failed to patch math elements', error);
    }
    body = doc.getElementsByTagName('body')[0];

    if (body && sourceIsPage) {
      // Source HTML contains <body />, assumes this to be a complete
      // page HTML. Assume this <body /> may contain the style that indicates
      // page's layout.
      const frag = doc.createElement('html');
      frag.appendChild(body);
      return frag.innerHTML;
    }
  }

  if (!body) {
    // <body /> should alway be generated by doc.
    return 'Unsupported HTML content';
  }

  // HTML snippet only.
  return `<body>${body.innerHTML}</body>`;
}
